Importando bibliotecas que serão utilizadas

library(xts)
library(dplyr)
library(purrr)
library(stats)
library(plotly)
library(janitor)
library(ggplot2)
library(seasonal)
library(forecast)
library(lubridate)
source("./source/multiplot.R")

Carregando a base do Spotify

data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-21/spotify_songs.csv')

Plotando popularidade de cada um dos gêneros presentes na base

popularity_genre <- data %>% 
  dplyr::group_by(playlist_genre) %>% 
  dplyr::summarise(popularity = mean(track_popularity)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(playlist_genre = as.factor(playlist_genre))

p <- popularity_genre %>% 
  ggplot(aes(x = playlist_genre, y = popularity, fill = playlist_genre)) + 
  geom_bar(colour="black", stat="identity") +
  labs(title = "Popularidade dos gêneros", y = "Popularidade", x = "Gênero")
  
ggplotly(p)

Apresentando tempo médio de cada gênero

duration_genre <- data %>% 
  dplyr::group_by(playlist_genre) %>% 
  dplyr::summarise(duration = mean(duration_ms)/1000) %>% 
  dplyr::ungroup() %>%
  dplyr::mutate(playlist_genre = as.factor(playlist_genre))

p <- duration_genre %>% 
  ggplot(aes(x = playlist_genre, y = duration, fill = playlist_genre)) + 
  geom_bar(colour="black", stat="identity") +
  labs(title = "Duração média dos gêneros", y = "Duração (s)", x = "Gênero")
  
ggplotly(p)

Correlacionando algumas variáveis da base

calc_indice <- function(x) {
  y <- x
  y[1] <- 100
  for(i in 2:length(x)) {
    y[i] <- (x[i]/x[i-1]) * y[i-1]
  }
  return(y)
}


data_correl <- data %>% 
  dplyr::group_by(playlist_genre) %>% 
  dplyr::summarise(duration = mean(duration_ms)/1000,
                   danceability = mean(danceability),
                   energy = mean(energy),
                   loudness = mean(loudness),
                   speechiness = mean(speechiness),
                   instrumentalness = mean(instrumentalness),
                   duration = mean(duration),
                   popularity = mean(track_popularity)) %>% 
  dplyr::ungroup() %>% 
  dplyr::select(-playlist_genre) %>% 
  purrr::map_df(function(x) x %>% calc_indice())

correl <- stats::cor(data_correl)  
##                     duration danceability      energy   loudness speechiness
## duration          1.00000000  -0.80893028 -0.07591468  0.6495955 -0.49575879
## danceability     -0.80893028   1.00000000 -0.33725448 -0.2993752  0.71645160
## energy           -0.07591468  -0.33725448  1.00000000 -0.7522404 -0.52282062
## loudness          0.64959548  -0.29937515 -0.75224044  1.0000000  0.17337482
## speechiness      -0.49575879   0.71645160 -0.52282062  0.1733748  1.00000000
## instrumentalness -0.19666198  -0.01581998  0.76882627 -0.7275554 -0.11602262
## popularity       -0.30723462   0.18557390 -0.39306778  0.1771410  0.04669578
##                  instrumentalness  popularity
## duration              -0.19666198 -0.30723462
## danceability          -0.01581998  0.18557390
## energy                 0.76882627 -0.39306778
## loudness              -0.72755540  0.17714103
## speechiness           -0.11602262  0.04669578
## instrumentalness       1.00000000 -0.77745609
## popularity            -0.77745609  1.00000000

Evolução de popularidade ao longos dos anos de cada gênero

data_evolution_popularity <- data %>% 
  dplyr::mutate(year = lubridate::year(as.Date(track_album_release_date))) %>% 
  dplyr::group_by(year, playlist_genre) %>% 
  dplyr::summarise(popularity = mean(track_popularity)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(playlist_genre = as.factor(playlist_genre))